## these are helper functions to grab commonly desired census data
## at the bottom is a function that calls all the others


pop_dens <-  function(geo, year, state = NULL, county = NULL){
  library(tigris)
  library(tidyverse)
  library(tidycensus)
  if(is.null(state)){
    pd <- rbindlist(lapply(unique(fips_codes[as.numeric(fips_codes$state_code) < 60, ]$state), function(state){
      if(geo == "block group"){
        area <- dplyr::select(tigris::block_groups(year = year, state = state, county = county), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610)
      }else if(geo == "tract"){
        area <- dplyr::select(tigris::tracts(year = year, state = state, county = county), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610)
      }else if(geo == "county"){
        area <- dplyr::select(tigris::tracts(year = year, state = state, county = county), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610,
                 GEOID = substring(GEOID, 1, 5)) %>%
          group_by(GEOID) %>%
          summarize(area = sum(area))
      }else if(geo == "county subdivision"){
        area <- dplyr::select(tigris::county_subdivisions(year = year, state = state, county = county), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610) %>%
          group_by(GEOID) %>%
          summarize(area = sum(area))
      }else if(geo == "place"){
        area <- dplyr::select(tigris::places(year = year, state = state), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610) %>%
          group_by(GEOID) %>%
          summarize(area = sum(area))
      }else if(geo == "state"){
        area <- dplyr::select(tigris::tracts(year = year, state = state, county = county), GEOID, area = ALAND) %>%
          mutate(area = as.numeric(area) * 0.00000038610,
                 GEOID = substring(GEOID, 1, 2)) %>%
          group_by(GEOID) %>%
          summarize(area = sum(area))
      }
      
      pop <- get_acs(geography = geo,
                     variables = c(population = "B01003_001"),
                     state = state, county = county, year = year) %>%
        dplyr::select(-variable, -moe) %>%
        dplyr::rename(population = estimate)
      
      pd <- inner_join(pop, area) %>%
        mutate(pop_dens = population / area) %>%
        select(GEOID, pop_dens)
      return(pd)
    }))
  }else{
    if(geo == "block group"){
      area <- dplyr::select(tigris::block_groups(state = state, county = county, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610)
    }else if(geo == "tract"){
      area <- dplyr::select(tigris::tracts(state = state, county = county, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610)
    }else if(geo == "county"){
      area <- dplyr::select(tigris::tracts(state = state, county = county, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610,
               GEOID = substring(GEOID, 1, 5)) %>%
        group_by(GEOID) %>%
        summarize(area = sum(area))
    }else if(geo == "county subdivision"){
      area <- dplyr::select(tigris::county_subdivisions(state = state, county = county, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610) %>%
        group_by(GEOID) %>%
        summarize(area = sum(area))
    }else if(geo == "place"){
      area <- dplyr::select(tigris::places(state = state, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610) %>%
        group_by(GEOID) %>%
        summarize(area = sum(area))
    }else if(geo == "state"){
      area <- dplyr::select(tigris::tracts(state = state, county = county, year = year), GEOID, area = ALAND) %>%
        mutate(area = as.numeric(area) * 0.00000038610,
               GEOID = substring(GEOID, 1, 2)) %>%
        group_by(GEOID) %>%
        summarize(area = sum(area))
    }
    
    pop <- get_acs(geography = geo,
                   variables = c(population = "B01003_001"),
                   state = state, county = county, year = year) %>%
      dplyr::select(-variable, -moe) %>%
      dplyr::rename(population = estimate)
    
    pd <- inner_join(pop, area) %>%
      mutate(pop_dens = population / area) %>%
      select(GEOID, pop_dens)
    return(pd)
  }
  return(pd)
}


census_race_ethnicity <- function(geo, year, state = NULL, county = NULL){
  library(tidycensus)
  library(tidyverse)
  race_ethnicity <- get_acs(geography = geo,
                            variables = c(nh_white = "B03002_003",
                                          nh_black = "B03002_004",
                                          latino = "B03002_012",
                                          latino_black = "B03002_014",
                                          native_american = "B03002_005",
                                          hawaiian_pac_island = "B03002_007",
                                          asian = "B03002_006"),
                            summary_var = c(population = "B03002_001"),
                            state = state, county = county, year = year) %>%
    dplyr::mutate(estimate = estimate / summary_est) %>%
    dplyr::select(-ends_with("moe")) %>%
    dplyr::rename(population = summary_est) %>%
    spread(variable, estimate)
}

census_income <- function(geo, year, state = NULL, county = NULL){
  library(tidycensus)
  library(tidyverse)
  income <- get_acs(geography = geo,
                    variables = c(medincome = "B19013_001"),
                    state = state, county = county, year = year) %>%
    dplyr::select(-variable, -moe) %>%
    dplyr::rename(median_income = estimate)
}

census_education <- function(geo, year, state = NULL, county = NULL){
  library(tidycensus)
  library(tidyverse)
  education <- get_acs(geography = geo,
                       variables = c("B15002_012",
                                     "B15002_013",
                                     "B15002_014",
                                     "B15002_015",
                                     "B15002_016",
                                     "B15002_017",
                                     "B15002_018",
                                     "B15002_029",
                                     "B15002_030",
                                     "B15002_031",
                                     "B15002_032",
                                     "B15002_033",
                                     "B15002_034",
                                     "B15002_035"),
                       summary_var = "B15002_001",
                       state = state, county = county, year = year) %>%
    dplyr::group_by(GEOID, NAME) %>%
    dplyr::summarize(some_college = sum(estimate / summary_est))
}

census_median_age <- function(geo, year, state = NULL, county = NULL){
  library(tidycensus)
  library(tidyverse)
  median_age <- get_acs(geography = geo,
                        variables = c(median_age = "B01002_001"),
                        state = state, county = county, year = year, output = "wide") %>%
    dplyr::select(GEOID, median_age = median_ageE)
}



get_basic_census_stats <- function(geo, year, state = NULL, county = NULL){
  
  
  race_ethnicity <- census_race_ethnicity(geo = geo, state = state, county = county, year = year)
  
  income <- census_income(geo = geo, state = state, county = county, year = year)
  
  education <- census_education(geo = geo, state = state, county = county, year = year)
  
  
  median_age <- census_median_age(geo = geo, state = state, county = county, year = year)
  
  pd <- pop_dens(geo = geo, state = state, county = county, year = year)
  
  census <- list(race_ethnicity, income, education,
                 median_age, pd) %>%
    reduce(left_join)
  
  return(census)
}

library(tidycensus)
library(tidyverse)
library(data.table)

gcd <- function(s){
  library(data.table)
  cd <- rbindlist(lapply(c(2014, 2016, 2018, 2019), function(y){
    get_basic_census_stats("block group", y, state = s) |> 
      dplyr::mutate(year = y)
  }))
  saveRDS(cd, paste0("temp/state_census_bgs_", s, ".rds"))
}


cl <- makeCluster(12)  
registerDoParallel(cl)

clusterExport(cl, list("gcd", "get_basic_census_stats",
                       "census_race_ethnicity", "census_income", "census_education",
                       "census_median_age", "pop_dens"))

parLapply(cl, unique(filter(fips_codes, state_code <= "56")$state_code), fun = gcd)

tot <- rbindlist(lapply(list.files("temp/", pattern = "state_census_bgs_*", full.names = T), readRDS))

for(y in unique(tot$year)){
  saveRDS(filter(tot, year == y) |> 
            select(-year), paste0("temp/census_bgs_", y - 2000, ".rds"))
}
